Python 3.11.3 (v3.11.3:f3909b8bc8, Apr 4 2023, 20:12:10) [Clang 13.0.0 (clang-1300.0.29.30)]
Type 'copyright', 'credits' or 'license' for more information
IPython 8.26.0 -- An enhanced Interactive Python. Type '?' for help.
for col in outlier_columns:
dataset = mark_outliers_chauvenet(df, col)
plot_binary_outliers(dataset=dataset, col=col, outlier_col=col + "_outlier", reset_index=True)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) /Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py in line 1 ----> <a href='file:///Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py?line=184'>185</a> for col in outlier_columns: <a href='file:///Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py?line=185'>186</a> dataset = mark_outliers_chauvenet(df, col) <a href='file:///Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py?line=186'>187</a> plot_binary_outliers(dataset=dataset, col=col, outlier_col=col + "_outlier", reset_index=True) NameError: name 'outlier_columns' is not defined
outlier_columns = list(df.columns[:6])
--------------------------------------------------------------------------- NameError Traceback (most recent call last) /Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py in line 1 ----> <a href='file:///Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py?line=13'>14</a> outlier_columns = list(df.columns[:6]) NameError: name 'df' is not defined
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import scipy
from sklearn.neighbors import LocalOutlierFactor # pip install scikit-learn
# --------------------------------------------------------------
# Load data
# --------------------------------------------------------------
df = pd.read_pickle("../../data/interim/processed_data_01.pkl")
outlier_columns = list(df.columns[:6])
# --------------------------------------------------------------
# Plotting outliers
# --------------------------------------------------------------
#print(plt.style.available)
plt.style.use("fivethirtyeight")
plt.rcParams["figure.figsize"] = (20, 5)
plt.rcParams["figure.dpi"] = 100
df[["acc_x", "label"]].boxplot(by = "label", figsize = (20, 10))
plt.show()
temp = ['acc_x', 'acc_y', 'acc_z']
columns_to_plot = temp[:3] + ["label"]
df[columns_to_plot].boxplot(by="label", figsize=(20, 10), layout = (1, 3))
plt.show()
df[outlier_columns[3:] + ["label"]].boxplot(by = "label", figsize = (20, 10), layout = (1, 3))
plt.show()
def plot_binary_outliers(dataset, col, outlier_col, reset_index):
""" Plot outliers in case of a binary outlier score. Here, the col specifies the real data
column and outlier_col the columns with a binary value (outlier or not).
Args:
dataset (pd.DataFrame): The dataset
col (string): Column that you want to plot
outlier_col (string): Outlier column marked with true/false
reset_index (bool): whether to reset the index for plotting
"""
# Taken from: https://github.com/mhoogen/ML4QS/blob/master/Python3Code/util/VisualizeDataset.py
dataset = dataset.dropna(axis=0, subset=[col, outlier_col])
dataset[outlier_col] = dataset[outlier_col].astype("bool")
if reset_index:
dataset = dataset.reset_index()
fig, ax = plt.subplots()
plt.xlabel("samples")
plt.ylabel("value")
# Plot non outliers in default color
ax.plot(
dataset.index[~dataset[outlier_col]],
dataset[col][~dataset[outlier_col]],
"+",
)
# Plot data points that are outliers in red
ax.plot(
dataset.index[dataset[outlier_col]],
dataset[col][dataset[outlier_col]],
"r+",
)
plt.legend(
["outlier " + col, "no outlier " + col],
loc="upper center",
ncol=2,
fancybox=True,
shadow=True,
)
plt.show()
# --------------------------------------------------------------
# Interquartile range (distribution based)
# --------------------------------------------------------------
# Insert IQR function
def mark_outliers_iqr(dataset, col):
"""Function to mark values as outliers using the IQR method.
Args:
dataset (pd.DataFrame): The dataset
col (string): The column you want apply outlier detection to
Returns:
pd.DataFrame: The original dataframe with an extra boolean column
indicating whether the value is an outlier or not.
"""
dataset = dataset.copy()
Q1 = dataset[col].quantile(0.25)
Q3 = dataset[col].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
dataset[col + "_outlier"] = (dataset[col] < lower_bound) | (
dataset[col] > upper_bound
)
return dataset
# Plot a single column
col = "acc_x"
dataset = mark_outliers_iqr(df, col)
plot_binary_outliers(dataset=dataset, col=col, outlier_col=col + "_outlier", reset_index=True)
# Loop over all columns
for col in outlier_columns:
dataset = mark_outliers_iqr(df, col)
plot_binary_outliers(dataset=dataset, col=col, outlier_col=col + "_outlier", reset_index=True)
# --------------------------------------------------------------
# Chauvenets criteron (distribution based)
# --------------------------------------------------------------
# Check for normal distribution
df[outlier_columns[:3] + ["label"]].plot.hist(by = "label", figsize = (20, 10), layout = (3, 3))
plt.show()
df[outlier_columns[3:] + ["label"]].plot.hist(by = "label", figsize = (20, 10), layout = (3, 3))
plt.show()
# Insert Chauvenet's function
# it assumes a normal distribution of the data
def mark_outliers_chauvenet(dataset, col, C=2):
"""Finds outliers in the specified column of datatable and adds a binary column with
the same name extended with '_outlier' that expresses the result per data point.
Taken from: https://github.com/mhoogen/ML4QS/blob/master/Python3Code/Chapter3/OutlierDetection.py
Args:
dataset (pd.DataFrame): The dataset
col (string): The column you want apply outlier detection to
C (int, optional): Degree of certainty for the identification of outliers given the assumption
of a normal distribution, typicaly between 1 - 10. Defaults to 2.
Returns:
pd.DataFrame: The original dataframe with an extra boolean column
indicating whether the value is an outlier or not.
"""
dataset = dataset.copy()
# Compute the mean and standard deviation.
mean = dataset[col].mean()
std = dataset[col].std()
N = len(dataset.index)
criterion = 1.0 / (C * N)
# Consider the deviation for the data points.
deviation = abs(dataset[col] - mean) / std
# Express the upper and lower bounds.
low = -deviation / math.sqrt(C)
high = deviation / math.sqrt(C)
prob = []
mask = []
# Pass all rows in the dataset.
for i in range(0, len(dataset.index)):
# Determine the probability of observing the point
prob.append(
1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
)
# And mark as an outlier when the probability is below our criterion.
mask.append(prob[i] < criterion)
dataset[col + "_outlier"] = mask
return dataset
for col in outlier_columns:
dataset = mark_outliers_chauvenet(df, col)
plot_binary_outliers(dataset=dataset, col=col, outlier_col=col + "_outlier", reset_index=True)
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
def mark_outliers_lof(dataset, columns, n=20):
"""Mark values as outliers using LOF
Args:
dataset (pd.DataFrame): The dataset
col (string): The column you want apply outlier detection to
n (int, optional): n_neighbors. Defaults to 20.
Returns:
pd.DataFrame: The original dataframe with an extra boolean column
indicating whether the value is an outlier or not.
"""
dataset = dataset.copy()
lof = LocalOutlierFactor(n_neighbors=n)
data = dataset[columns]
outliers = lof.fit_predict(data)
X_scores = lof.negative_outlier_factor_
dataset["outlier_lof"] = outliers == -1
return dataset, outliers, X_scores
dataset, outliers, X_scores = mark_outliers_lof(df, outlier_columns)
dataset
| acc_x | acc_y | acc_z | gyr_x | gyr_y | gyr_z | label | category | participant | set | outlier_lof | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| epoch (ms) | |||||||||||
| 2019-01-11 15:08:05.200 | 0.013500 | 0.977000 | -0.071000 | -1.8904 | 2.4392 | 0.9388 | bench | heavy | B | 64 | False |
| 2019-01-11 15:08:05.400 | -0.001500 | 0.970500 | -0.079500 | -1.6826 | -0.8904 | 2.1708 | bench | heavy | B | 64 | False |
| 2019-01-11 15:08:05.600 | 0.001333 | 0.971667 | -0.064333 | 2.5608 | -0.2560 | -1.4146 | bench | heavy | B | 64 | False |
| 2019-01-11 15:08:05.800 | -0.024000 | 0.957000 | -0.073500 | 8.0610 | -4.5244 | -2.0730 | bench | heavy | B | 64 | False |
| 2019-01-11 15:08:06.000 | -0.028000 | 0.957667 | -0.115000 | 2.4390 | -1.5486 | -3.6098 | bench | heavy | B | 64 | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2019-01-20 17:33:27.000 | -0.048000 | -1.041500 | -0.076500 | 1.4146 | -5.6218 | 0.2926 | row | medium | E | 71 | False |
| 2019-01-20 17:33:27.200 | -0.037000 | -1.030333 | -0.053333 | -2.7684 | -0.5854 | 2.2440 | row | medium | E | 71 | False |
| 2019-01-20 17:33:27.400 | -0.060000 | -1.031000 | -0.082000 | 2.8416 | -5.1342 | -0.1220 | row | medium | E | 71 | False |
| 2019-01-20 17:33:27.600 | -0.038667 | -1.025667 | -0.044667 | -0.2318 | 0.2562 | 1.1220 | row | medium | E | 71 | False |
| 2019-01-20 17:33:27.800 | -0.044000 | -1.034000 | -0.059000 | 1.0980 | -4.0240 | 0.9760 | row | medium | E | 71 | False |
9009 rows × 11 columns
outliers
array([1, 1, 1, ..., 1, 1, 1])
X_scores
array([-1.02773582, -1.07699558, -1.15029777, ..., -0.99433498,
-0.98962988, -0.98354443])
for col in outlier_columns:
plot_binary_outliers(dataset=dataset, col=col, outlier_col="outlier_lof", reset_index=True)
label = "bench"
for col in outlier_columns:
dataset = mark_outliers_iqr(df[df["label"] == label], col)
plot_binary_outliers(dataset, col, col + "_outlier", reset_index=True)
label = "squat"
for col in outlier_columns:
dataset = mark_outliers_iqr(df[df["label"] == label], col)
plot_binary_outliers(dataset, col, col + "_outlier", reset_index=True)
label = "bench"
for col in outlier_columns:
dataset = mark_outliers_chauvenet(df[df["label"] == label], col)
plot_binary_outliers(dataset, col, col + "_outlier", reset_index=True)
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
label = "bench"
for col in outlier_columns:
dataset = mark_outliers_chauvenet(df[df["label"] == label], col)
plot_binary_outliers(dataset, col, col + "_outlier", reset_index=True)
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
label = "bench"
for col in outlier_columns:
dataset = mark_outliers_lof(df[df["label"] == label], col)
plot_binary_outliers(dataset, col, col + "_outlier", reset_index=True)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) /Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py in line 2 <a href='file:///Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py?line=245'>246</a> for col in outlier_columns: ----> <a href='file:///Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py?line=246'>247</a> dataset = mark_outliers_lof(df[df["label"] == label], col) <a href='file:///Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py?line=247'>248</a> plot_binary_outliers(dataset, col, col + "_outlier", reset_index=True) /Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py in line 14, in mark_outliers_lof(dataset, columns, n) <a href='file:///Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py?line=206'>207</a> lof = LocalOutlierFactor(n_neighbors=n) <a href='file:///Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py?line=207'>208</a> data = dataset[columns] ---> <a href='file:///Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py?line=208'>209</a> outliers = lof.fit_predict(data) <a href='file:///Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py?line=209'>210</a> X_scores = lof.negative_outlier_factor_ <a href='file:///Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py?line=210'>211</a> dataset["outlier_lof"] = outliers == -1 File /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py:256, in LocalOutlierFactor.fit_predict(self, X, y) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=231'>232</a> """Fit the model to the training set X and return the labels. <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=232'>233</a> <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=233'>234</a> **Not available for novelty detection (when novelty is set to True).** (...) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=249'>250</a> Returns -1 for anomalies/outliers and 1 for inliers. <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=250'>251</a> """ <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=252'>253</a> # As fit_predict would be different from fit.predict, fit_predict is <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=253'>254</a> # only available for outlier detection (novelty=False) --> <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=255'>256</a> return self.fit(X)._predict() File /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/base.py:1473, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/base.py?line=1465'>1466</a> estimator._validate_params() <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/base.py?line=1467'>1468</a> with config_context( <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/base.py?line=1468'>1469</a> skip_parameter_validation=( <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/base.py?line=1469'>1470</a> prefer_skip_nested_validation or global_skip_validation <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/base.py?line=1470'>1471</a> ) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/base.py?line=1471'>1472</a> ): -> <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/base.py?line=1472'>1473</a> return fit_method(estimator, *args, **kwargs) File /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py:279, in LocalOutlierFactor.fit(self, X, y) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=257'>258</a> @_fit_context( <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=258'>259</a> # LocalOutlierFactor.metric is not validated yet <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=259'>260</a> prefer_skip_nested_validation=False <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=260'>261</a> ) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=261'>262</a> def fit(self, X, y=None): <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=262'>263</a> """Fit the local outlier factor detector from the training dataset. <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=263'>264</a> <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=264'>265</a> Parameters (...) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=276'>277</a> The fitted local outlier factor detector. <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=277'>278</a> """ --> <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=278'>279</a> self._fit(X) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=280'>281</a> n_samples = self.n_samples_fit_ <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_lof.py?line=281'>282</a> if self.n_neighbors > n_samples: File /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_base.py:517, in NeighborsBase._fit(self, X, y) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_base.py?line=514'>515</a> else: <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_base.py?line=515'>516</a> if not isinstance(X, (KDTree, BallTree, NeighborsBase)): --> <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_base.py?line=516'>517</a> X = self._validate_data(X, accept_sparse="csr", order="C") <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_base.py?line=518'>519</a> self._check_algorithm_metric() <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/neighbors/_base.py?line=519'>520</a> if self.metric_params is None: File /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/base.py:633, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, cast_to_ndarray, **check_params) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/base.py?line=630'>631</a> out = X, y <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/base.py?line=631'>632</a> elif not no_val_X and no_val_y: --> <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/base.py?line=632'>633</a> out = check_array(X, input_name="X", **check_params) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/base.py?line=633'>634</a> elif no_val_X and not no_val_y: <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/base.py?line=634'>635</a> out = _check_y(y, **check_params) File /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/utils/validation.py:1050, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/utils/validation.py?line=1042'>1043</a> else: <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/utils/validation.py?line=1043'>1044</a> msg = ( <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/utils/validation.py?line=1044'>1045</a> f"Expected 2D array, got 1D array instead:\narray={array}.\n" <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/utils/validation.py?line=1045'>1046</a> "Reshape your data either using array.reshape(-1, 1) if " <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/utils/validation.py?line=1046'>1047</a> "your data has a single feature or array.reshape(1, -1) " <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/utils/validation.py?line=1047'>1048</a> "if it contains a single sample." <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/utils/validation.py?line=1048'>1049</a> ) -> <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/utils/validation.py?line=1049'>1050</a> raise ValueError(msg) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/utils/validation.py?line=1051'>1052</a> if dtype_numeric and hasattr(array.dtype, "kind") and array.dtype.kind in "USV": <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/utils/validation.py?line=1052'>1053</a> raise ValueError( <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/utils/validation.py?line=1053'>1054</a> "dtype='numeric' is not compatible with arrays of bytes/strings." <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/utils/validation.py?line=1054'>1055</a> "Convert your data to numeric values explicitly instead." <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/utils/validation.py?line=1055'>1056</a> ) ValueError: Expected a 2-dimensional container but got <class 'pandas.core.series.Series'> instead. Pass a DataFrame containing a single row (i.e. single sample) or a single column (i.e. single feature) instead.
dataset, outliers, X_scores = mark_outliers_lof(df, outlier_columns)
for col in outlier_columns:
plot_binary_outliers(dataset=dataset, col=col, outlier_col="outlier_lof", reset_index=True)
col = "gyr_z"
dataset = mark_outliers_chauvenet(df, col = col)
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
dataset
| acc_x | acc_y | acc_z | gyr_x | gyr_y | gyr_z | label | category | participant | set | gyr_z_outlier | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| epoch (ms) | |||||||||||
| 2019-01-11 15:08:05.200 | 0.013500 | 0.977000 | -0.071000 | -1.8904 | 2.4392 | 0.9388 | bench | heavy | B | 64 | False |
| 2019-01-11 15:08:05.400 | -0.001500 | 0.970500 | -0.079500 | -1.6826 | -0.8904 | 2.1708 | bench | heavy | B | 64 | False |
| 2019-01-11 15:08:05.600 | 0.001333 | 0.971667 | -0.064333 | 2.5608 | -0.2560 | -1.4146 | bench | heavy | B | 64 | False |
| 2019-01-11 15:08:05.800 | -0.024000 | 0.957000 | -0.073500 | 8.0610 | -4.5244 | -2.0730 | bench | heavy | B | 64 | False |
| 2019-01-11 15:08:06.000 | -0.028000 | 0.957667 | -0.115000 | 2.4390 | -1.5486 | -3.6098 | bench | heavy | B | 64 | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2019-01-20 17:33:27.000 | -0.048000 | -1.041500 | -0.076500 | 1.4146 | -5.6218 | 0.2926 | row | medium | E | 71 | False |
| 2019-01-20 17:33:27.200 | -0.037000 | -1.030333 | -0.053333 | -2.7684 | -0.5854 | 2.2440 | row | medium | E | 71 | False |
| 2019-01-20 17:33:27.400 | -0.060000 | -1.031000 | -0.082000 | 2.8416 | -5.1342 | -0.1220 | row | medium | E | 71 | False |
| 2019-01-20 17:33:27.600 | -0.038667 | -1.025667 | -0.044667 | -0.2318 | 0.2562 | 1.1220 | row | medium | E | 71 | False |
| 2019-01-20 17:33:27.800 | -0.044000 | -1.034000 | -0.059000 | 1.0980 | -4.0240 | 0.9760 | row | medium | E | 71 | False |
9009 rows × 11 columns
dataset[dataset["gyr_z_outlier"]]
| acc_x | acc_y | acc_z | gyr_x | gyr_y | gyr_z | label | category | participant | set | gyr_z_outlier | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| epoch (ms) | |||||||||||
| 2019-01-14 13:57:41.800 | -0.137000 | 1.495500 | 0.217000 | 33.9146 | 14.6462 | -98.8294 | ohp | heavy | C | 34 | True |
| 2019-01-18 17:22:40.600 | 0.915500 | -0.302500 | -0.047500 | 16.5364 | 30.1952 | 119.8050 | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:40.800 | 0.846667 | -0.645667 | 0.174333 | -49.5610 | 63.7196 | 104.2684 | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:49.600 | 1.363667 | 0.030000 | -0.001333 | -45.7196 | -8.1218 | -168.9514 | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:49.800 | 0.707500 | 0.390500 | 0.181500 | -100.5976 | -33.9634 | -177.6098 | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:52.000 | 0.371333 | 0.580333 | 0.323333 | 44.3416 | 75.4390 | 132.6100 | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:52.200 | 0.744500 | 0.272000 | 0.261500 | 23.2562 | 22.4388 | 196.3294 | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:59.600 | 0.800000 | -0.406333 | 0.121667 | -35.7436 | 28.3414 | 127.8780 | rest | sitting | A | 6 | True |
| 2019-01-18 17:25:40.400 | 0.623500 | -0.247500 | 0.226000 | -5.7562 | 55.8294 | 175.6464 | rest | standing | A | 36 | True |
| 2019-01-18 17:25:40.600 | 0.669333 | -0.957000 | 0.440667 | -48.6584 | 85.9266 | 178.5730 | rest | standing | A | 36 | True |
| 2019-01-18 17:26:04.800 | 1.115000 | -0.792500 | 0.809000 | 91.5732 | 21.1710 | -209.7562 | rest | standing | A | 36 | True |
| 2019-01-18 17:26:05.000 | 0.714000 | -0.101000 | 0.646000 | 98.5000 | 127.2804 | -209.8538 | rest | standing | A | 36 | True |
| 2019-01-18 17:26:07.400 | -0.217667 | 0.465667 | 0.320333 | -108.5002 | -117.7196 | 167.3170 | rest | standing | A | 36 | True |
| 2019-01-18 17:26:07.600 | 0.616000 | -0.205500 | 0.625000 | -207.6098 | -96.8414 | 269.0854 | rest | standing | A | 36 | True |
| 2019-01-18 17:26:07.800 | 1.070333 | -1.238333 | 0.751000 | -79.0244 | -103.5126 | 179.7196 | rest | standing | A | 36 | True |
| 2019-01-18 17:26:09.400 | 0.773667 | -1.072000 | 0.310000 | -8.6950 | -61.7318 | -162.9390 | rest | standing | A | 36 | True |
| 2019-01-18 17:26:09.600 | 1.464000 | -0.902000 | 0.083000 | -128.5364 | -75.0488 | -338.1708 | rest | standing | A | 36 | True |
| 2019-01-18 17:26:09.800 | 0.709333 | -0.014667 | -0.120667 | -235.5244 | -187.9512 | -169.1826 | rest | standing | A | 36 | True |
| 2019-01-18 17:26:11.600 | 0.419000 | 0.175500 | -0.176500 | 171.8048 | 103.7316 | 267.3416 | rest | standing | A | 36 | True |
| 2019-01-18 17:26:11.800 | 1.155333 | -0.807667 | -0.160333 | -81.4878 | 233.1832 | 248.3416 | rest | standing | A | 36 | True |
| 2019-01-19 17:22:40.600 | 0.915500 | -0.302500 | -0.047500 | 16.5364 | 30.1952 | 119.8050 | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:40.800 | 0.846667 | -0.645667 | 0.174333 | -49.5610 | 63.7196 | 104.2684 | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:49.600 | 1.363667 | 0.030000 | -0.001333 | -45.7196 | -8.1218 | -168.9514 | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:49.800 | 0.707500 | 0.390500 | 0.181500 | -100.5976 | -33.9634 | -177.6098 | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:52.000 | 0.371333 | 0.580333 | 0.323333 | 44.3416 | 75.4390 | 132.6100 | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:52.200 | 0.744500 | 0.272000 | 0.261500 | 23.2562 | 22.4388 | 196.3294 | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:59.600 | 0.800000 | -0.406333 | 0.121667 | -35.7436 | 28.3414 | 127.8780 | rest | sitting | A | 62 | True |
| 2019-01-19 17:25:40.400 | 0.623500 | -0.247500 | 0.226000 | -5.7562 | 55.8294 | 175.6464 | rest | standing | A | 68 | True |
| 2019-01-19 17:25:40.600 | 0.669333 | -0.957000 | 0.440667 | -48.6584 | 85.9266 | 178.5730 | rest | standing | A | 68 | True |
| 2019-01-19 17:26:04.800 | 1.115000 | -0.792500 | 0.809000 | 91.5732 | 21.1710 | -209.7562 | rest | standing | A | 68 | True |
| 2019-01-19 17:26:05.000 | 0.714000 | -0.101000 | 0.646000 | 98.5000 | 127.2804 | -209.8538 | rest | standing | A | 68 | True |
| 2019-01-19 17:26:07.400 | -0.217667 | 0.465667 | 0.320333 | -108.5002 | -117.7196 | 167.3170 | rest | standing | A | 68 | True |
| 2019-01-19 17:26:07.600 | 0.616000 | -0.205500 | 0.625000 | -207.6098 | -96.8414 | 269.0854 | rest | standing | A | 68 | True |
| 2019-01-19 17:26:07.800 | 1.070333 | -1.238333 | 0.751000 | -79.0244 | -103.5126 | 179.7196 | rest | standing | A | 68 | True |
| 2019-01-19 17:26:09.400 | 0.773667 | -1.072000 | 0.310000 | -8.6950 | -61.7318 | -162.9390 | rest | standing | A | 68 | True |
| 2019-01-19 17:26:09.600 | 1.464000 | -0.902000 | 0.083000 | -128.5364 | -75.0488 | -338.1708 | rest | standing | A | 68 | True |
| 2019-01-19 17:26:09.800 | 0.709333 | -0.014667 | -0.120667 | -235.5244 | -187.9512 | -169.1826 | rest | standing | A | 68 | True |
| 2019-01-19 17:26:11.600 | 0.419000 | 0.175500 | -0.176500 | 171.8048 | 103.7316 | 267.3416 | rest | standing | A | 68 | True |
| 2019-01-19 17:26:11.800 | 1.155333 | -0.807667 | -0.160333 | -81.4878 | 233.1832 | 248.3416 | rest | standing | A | 68 | True |
| 2019-01-20 17:22:40.600 | 0.915500 | -0.302500 | -0.047500 | 16.5364 | 30.1952 | 119.8050 | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:40.800 | 0.846667 | -0.645667 | 0.174333 | -49.5610 | 63.7196 | 104.2684 | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:49.600 | 1.363667 | 0.030000 | -0.001333 | -45.7196 | -8.1218 | -168.9514 | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:49.800 | 0.707500 | 0.390500 | 0.181500 | -100.5976 | -33.9634 | -177.6098 | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:52.000 | 0.371333 | 0.580333 | 0.323333 | 44.3416 | 75.4390 | 132.6100 | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:52.200 | 0.744500 | 0.272000 | 0.261500 | 23.2562 | 22.4388 | 196.3294 | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:59.600 | 0.800000 | -0.406333 | 0.121667 | -35.7436 | 28.3414 | 127.8780 | rest | sitting | E | 54 | True |
| 2019-01-20 17:25:40.400 | 0.623500 | -0.247500 | 0.226000 | -5.7562 | 55.8294 | 175.6464 | rest | standing | E | 44 | True |
| 2019-01-20 17:25:40.600 | 0.669333 | -0.957000 | 0.440667 | -48.6584 | 85.9266 | 178.5730 | rest | standing | E | 44 | True |
| 2019-01-20 17:26:04.800 | 1.115000 | -0.792500 | 0.809000 | 91.5732 | 21.1710 | -209.7562 | rest | standing | E | 44 | True |
| 2019-01-20 17:26:05.000 | 0.714000 | -0.101000 | 0.646000 | 98.5000 | 127.2804 | -209.8538 | rest | standing | E | 44 | True |
| 2019-01-20 17:26:07.400 | -0.217667 | 0.465667 | 0.320333 | -108.5002 | -117.7196 | 167.3170 | rest | standing | E | 44 | True |
| 2019-01-20 17:26:07.600 | 0.616000 | -0.205500 | 0.625000 | -207.6098 | -96.8414 | 269.0854 | rest | standing | E | 44 | True |
| 2019-01-20 17:26:07.800 | 1.070333 | -1.238333 | 0.751000 | -79.0244 | -103.5126 | 179.7196 | rest | standing | E | 44 | True |
| 2019-01-20 17:26:09.400 | 0.773667 | -1.072000 | 0.310000 | -8.6950 | -61.7318 | -162.9390 | rest | standing | E | 44 | True |
| 2019-01-20 17:26:09.600 | 1.464000 | -0.902000 | 0.083000 | -128.5364 | -75.0488 | -338.1708 | rest | standing | E | 44 | True |
| 2019-01-20 17:26:09.800 | 0.709333 | -0.014667 | -0.120667 | -235.5244 | -187.9512 | -169.1826 | rest | standing | E | 44 | True |
| 2019-01-20 17:26:11.600 | 0.419000 | 0.175500 | -0.176500 | 171.8048 | 103.7316 | 267.3416 | rest | standing | E | 44 | True |
| 2019-01-20 17:26:11.800 | 1.155333 | -0.807667 | -0.160333 | -81.4878 | 233.1832 | 248.3416 | rest | standing | E | 44 | True |
dataset.loc[dataset["gyr_z_outlier"], "gyr_z"] = np.nan
dataset
| acc_x | acc_y | acc_z | gyr_x | gyr_y | gyr_z | label | category | participant | set | gyr_z_outlier | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| epoch (ms) | |||||||||||
| 2019-01-11 15:08:05.200 | 0.013500 | 0.977000 | -0.071000 | -1.8904 | 2.4392 | 0.9388 | bench | heavy | B | 64 | False |
| 2019-01-11 15:08:05.400 | -0.001500 | 0.970500 | -0.079500 | -1.6826 | -0.8904 | 2.1708 | bench | heavy | B | 64 | False |
| 2019-01-11 15:08:05.600 | 0.001333 | 0.971667 | -0.064333 | 2.5608 | -0.2560 | -1.4146 | bench | heavy | B | 64 | False |
| 2019-01-11 15:08:05.800 | -0.024000 | 0.957000 | -0.073500 | 8.0610 | -4.5244 | -2.0730 | bench | heavy | B | 64 | False |
| 2019-01-11 15:08:06.000 | -0.028000 | 0.957667 | -0.115000 | 2.4390 | -1.5486 | -3.6098 | bench | heavy | B | 64 | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2019-01-20 17:33:27.000 | -0.048000 | -1.041500 | -0.076500 | 1.4146 | -5.6218 | 0.2926 | row | medium | E | 71 | False |
| 2019-01-20 17:33:27.200 | -0.037000 | -1.030333 | -0.053333 | -2.7684 | -0.5854 | 2.2440 | row | medium | E | 71 | False |
| 2019-01-20 17:33:27.400 | -0.060000 | -1.031000 | -0.082000 | 2.8416 | -5.1342 | -0.1220 | row | medium | E | 71 | False |
| 2019-01-20 17:33:27.600 | -0.038667 | -1.025667 | -0.044667 | -0.2318 | 0.2562 | 1.1220 | row | medium | E | 71 | False |
| 2019-01-20 17:33:27.800 | -0.044000 | -1.034000 | -0.059000 | 1.0980 | -4.0240 | 0.9760 | row | medium | E | 71 | False |
9009 rows × 11 columns
dataset[dataset["gyr_z_outlier"]] # outliers in dataset
| acc_x | acc_y | acc_z | gyr_x | gyr_y | gyr_z | label | category | participant | set | gyr_z_outlier | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| epoch (ms) | |||||||||||
| 2019-01-14 13:57:41.800 | -0.137000 | 1.495500 | 0.217000 | 33.9146 | 14.6462 | NaN | ohp | heavy | C | 34 | True |
| 2019-01-18 17:22:40.600 | 0.915500 | -0.302500 | -0.047500 | 16.5364 | 30.1952 | NaN | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:40.800 | 0.846667 | -0.645667 | 0.174333 | -49.5610 | 63.7196 | NaN | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:49.600 | 1.363667 | 0.030000 | -0.001333 | -45.7196 | -8.1218 | NaN | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:49.800 | 0.707500 | 0.390500 | 0.181500 | -100.5976 | -33.9634 | NaN | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:52.000 | 0.371333 | 0.580333 | 0.323333 | 44.3416 | 75.4390 | NaN | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:52.200 | 0.744500 | 0.272000 | 0.261500 | 23.2562 | 22.4388 | NaN | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:59.600 | 0.800000 | -0.406333 | 0.121667 | -35.7436 | 28.3414 | NaN | rest | sitting | A | 6 | True |
| 2019-01-18 17:25:40.400 | 0.623500 | -0.247500 | 0.226000 | -5.7562 | 55.8294 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:25:40.600 | 0.669333 | -0.957000 | 0.440667 | -48.6584 | 85.9266 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:04.800 | 1.115000 | -0.792500 | 0.809000 | 91.5732 | 21.1710 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:05.000 | 0.714000 | -0.101000 | 0.646000 | 98.5000 | 127.2804 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:07.400 | -0.217667 | 0.465667 | 0.320333 | -108.5002 | -117.7196 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:07.600 | 0.616000 | -0.205500 | 0.625000 | -207.6098 | -96.8414 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:07.800 | 1.070333 | -1.238333 | 0.751000 | -79.0244 | -103.5126 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:09.400 | 0.773667 | -1.072000 | 0.310000 | -8.6950 | -61.7318 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:09.600 | 1.464000 | -0.902000 | 0.083000 | -128.5364 | -75.0488 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:09.800 | 0.709333 | -0.014667 | -0.120667 | -235.5244 | -187.9512 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:11.600 | 0.419000 | 0.175500 | -0.176500 | 171.8048 | 103.7316 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:11.800 | 1.155333 | -0.807667 | -0.160333 | -81.4878 | 233.1832 | NaN | rest | standing | A | 36 | True |
| 2019-01-19 17:22:40.600 | 0.915500 | -0.302500 | -0.047500 | 16.5364 | 30.1952 | NaN | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:40.800 | 0.846667 | -0.645667 | 0.174333 | -49.5610 | 63.7196 | NaN | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:49.600 | 1.363667 | 0.030000 | -0.001333 | -45.7196 | -8.1218 | NaN | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:49.800 | 0.707500 | 0.390500 | 0.181500 | -100.5976 | -33.9634 | NaN | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:52.000 | 0.371333 | 0.580333 | 0.323333 | 44.3416 | 75.4390 | NaN | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:52.200 | 0.744500 | 0.272000 | 0.261500 | 23.2562 | 22.4388 | NaN | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:59.600 | 0.800000 | -0.406333 | 0.121667 | -35.7436 | 28.3414 | NaN | rest | sitting | A | 62 | True |
| 2019-01-19 17:25:40.400 | 0.623500 | -0.247500 | 0.226000 | -5.7562 | 55.8294 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:25:40.600 | 0.669333 | -0.957000 | 0.440667 | -48.6584 | 85.9266 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:04.800 | 1.115000 | -0.792500 | 0.809000 | 91.5732 | 21.1710 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:05.000 | 0.714000 | -0.101000 | 0.646000 | 98.5000 | 127.2804 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:07.400 | -0.217667 | 0.465667 | 0.320333 | -108.5002 | -117.7196 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:07.600 | 0.616000 | -0.205500 | 0.625000 | -207.6098 | -96.8414 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:07.800 | 1.070333 | -1.238333 | 0.751000 | -79.0244 | -103.5126 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:09.400 | 0.773667 | -1.072000 | 0.310000 | -8.6950 | -61.7318 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:09.600 | 1.464000 | -0.902000 | 0.083000 | -128.5364 | -75.0488 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:09.800 | 0.709333 | -0.014667 | -0.120667 | -235.5244 | -187.9512 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:11.600 | 0.419000 | 0.175500 | -0.176500 | 171.8048 | 103.7316 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:11.800 | 1.155333 | -0.807667 | -0.160333 | -81.4878 | 233.1832 | NaN | rest | standing | A | 68 | True |
| 2019-01-20 17:22:40.600 | 0.915500 | -0.302500 | -0.047500 | 16.5364 | 30.1952 | NaN | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:40.800 | 0.846667 | -0.645667 | 0.174333 | -49.5610 | 63.7196 | NaN | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:49.600 | 1.363667 | 0.030000 | -0.001333 | -45.7196 | -8.1218 | NaN | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:49.800 | 0.707500 | 0.390500 | 0.181500 | -100.5976 | -33.9634 | NaN | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:52.000 | 0.371333 | 0.580333 | 0.323333 | 44.3416 | 75.4390 | NaN | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:52.200 | 0.744500 | 0.272000 | 0.261500 | 23.2562 | 22.4388 | NaN | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:59.600 | 0.800000 | -0.406333 | 0.121667 | -35.7436 | 28.3414 | NaN | rest | sitting | E | 54 | True |
| 2019-01-20 17:25:40.400 | 0.623500 | -0.247500 | 0.226000 | -5.7562 | 55.8294 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:25:40.600 | 0.669333 | -0.957000 | 0.440667 | -48.6584 | 85.9266 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:04.800 | 1.115000 | -0.792500 | 0.809000 | 91.5732 | 21.1710 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:05.000 | 0.714000 | -0.101000 | 0.646000 | 98.5000 | 127.2804 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:07.400 | -0.217667 | 0.465667 | 0.320333 | -108.5002 | -117.7196 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:07.600 | 0.616000 | -0.205500 | 0.625000 | -207.6098 | -96.8414 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:07.800 | 1.070333 | -1.238333 | 0.751000 | -79.0244 | -103.5126 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:09.400 | 0.773667 | -1.072000 | 0.310000 | -8.6950 | -61.7318 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:09.600 | 1.464000 | -0.902000 | 0.083000 | -128.5364 | -75.0488 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:09.800 | 0.709333 | -0.014667 | -0.120667 | -235.5244 | -187.9512 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:11.600 | 0.419000 | 0.175500 | -0.176500 | 171.8048 | 103.7316 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:11.800 | 1.155333 | -0.807667 | -0.160333 | -81.4878 | 233.1832 | NaN | rest | standing | E | 44 | True |
dataset.loc[dataset["gyr_z_outlier"], "gyr_z"] = np.nan
dataset[dataset["gyr_z_outlier"]] # outliers in dataset
| acc_x | acc_y | acc_z | gyr_x | gyr_y | gyr_z | label | category | participant | set | gyr_z_outlier | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| epoch (ms) | |||||||||||
| 2019-01-14 13:57:41.800 | -0.137000 | 1.495500 | 0.217000 | 33.9146 | 14.6462 | NaN | ohp | heavy | C | 34 | True |
| 2019-01-18 17:22:40.600 | 0.915500 | -0.302500 | -0.047500 | 16.5364 | 30.1952 | NaN | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:40.800 | 0.846667 | -0.645667 | 0.174333 | -49.5610 | 63.7196 | NaN | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:49.600 | 1.363667 | 0.030000 | -0.001333 | -45.7196 | -8.1218 | NaN | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:49.800 | 0.707500 | 0.390500 | 0.181500 | -100.5976 | -33.9634 | NaN | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:52.000 | 0.371333 | 0.580333 | 0.323333 | 44.3416 | 75.4390 | NaN | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:52.200 | 0.744500 | 0.272000 | 0.261500 | 23.2562 | 22.4388 | NaN | rest | sitting | A | 6 | True |
| 2019-01-18 17:22:59.600 | 0.800000 | -0.406333 | 0.121667 | -35.7436 | 28.3414 | NaN | rest | sitting | A | 6 | True |
| 2019-01-18 17:25:40.400 | 0.623500 | -0.247500 | 0.226000 | -5.7562 | 55.8294 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:25:40.600 | 0.669333 | -0.957000 | 0.440667 | -48.6584 | 85.9266 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:04.800 | 1.115000 | -0.792500 | 0.809000 | 91.5732 | 21.1710 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:05.000 | 0.714000 | -0.101000 | 0.646000 | 98.5000 | 127.2804 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:07.400 | -0.217667 | 0.465667 | 0.320333 | -108.5002 | -117.7196 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:07.600 | 0.616000 | -0.205500 | 0.625000 | -207.6098 | -96.8414 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:07.800 | 1.070333 | -1.238333 | 0.751000 | -79.0244 | -103.5126 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:09.400 | 0.773667 | -1.072000 | 0.310000 | -8.6950 | -61.7318 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:09.600 | 1.464000 | -0.902000 | 0.083000 | -128.5364 | -75.0488 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:09.800 | 0.709333 | -0.014667 | -0.120667 | -235.5244 | -187.9512 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:11.600 | 0.419000 | 0.175500 | -0.176500 | 171.8048 | 103.7316 | NaN | rest | standing | A | 36 | True |
| 2019-01-18 17:26:11.800 | 1.155333 | -0.807667 | -0.160333 | -81.4878 | 233.1832 | NaN | rest | standing | A | 36 | True |
| 2019-01-19 17:22:40.600 | 0.915500 | -0.302500 | -0.047500 | 16.5364 | 30.1952 | NaN | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:40.800 | 0.846667 | -0.645667 | 0.174333 | -49.5610 | 63.7196 | NaN | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:49.600 | 1.363667 | 0.030000 | -0.001333 | -45.7196 | -8.1218 | NaN | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:49.800 | 0.707500 | 0.390500 | 0.181500 | -100.5976 | -33.9634 | NaN | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:52.000 | 0.371333 | 0.580333 | 0.323333 | 44.3416 | 75.4390 | NaN | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:52.200 | 0.744500 | 0.272000 | 0.261500 | 23.2562 | 22.4388 | NaN | rest | sitting | A | 62 | True |
| 2019-01-19 17:22:59.600 | 0.800000 | -0.406333 | 0.121667 | -35.7436 | 28.3414 | NaN | rest | sitting | A | 62 | True |
| 2019-01-19 17:25:40.400 | 0.623500 | -0.247500 | 0.226000 | -5.7562 | 55.8294 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:25:40.600 | 0.669333 | -0.957000 | 0.440667 | -48.6584 | 85.9266 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:04.800 | 1.115000 | -0.792500 | 0.809000 | 91.5732 | 21.1710 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:05.000 | 0.714000 | -0.101000 | 0.646000 | 98.5000 | 127.2804 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:07.400 | -0.217667 | 0.465667 | 0.320333 | -108.5002 | -117.7196 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:07.600 | 0.616000 | -0.205500 | 0.625000 | -207.6098 | -96.8414 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:07.800 | 1.070333 | -1.238333 | 0.751000 | -79.0244 | -103.5126 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:09.400 | 0.773667 | -1.072000 | 0.310000 | -8.6950 | -61.7318 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:09.600 | 1.464000 | -0.902000 | 0.083000 | -128.5364 | -75.0488 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:09.800 | 0.709333 | -0.014667 | -0.120667 | -235.5244 | -187.9512 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:11.600 | 0.419000 | 0.175500 | -0.176500 | 171.8048 | 103.7316 | NaN | rest | standing | A | 68 | True |
| 2019-01-19 17:26:11.800 | 1.155333 | -0.807667 | -0.160333 | -81.4878 | 233.1832 | NaN | rest | standing | A | 68 | True |
| 2019-01-20 17:22:40.600 | 0.915500 | -0.302500 | -0.047500 | 16.5364 | 30.1952 | NaN | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:40.800 | 0.846667 | -0.645667 | 0.174333 | -49.5610 | 63.7196 | NaN | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:49.600 | 1.363667 | 0.030000 | -0.001333 | -45.7196 | -8.1218 | NaN | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:49.800 | 0.707500 | 0.390500 | 0.181500 | -100.5976 | -33.9634 | NaN | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:52.000 | 0.371333 | 0.580333 | 0.323333 | 44.3416 | 75.4390 | NaN | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:52.200 | 0.744500 | 0.272000 | 0.261500 | 23.2562 | 22.4388 | NaN | rest | sitting | E | 54 | True |
| 2019-01-20 17:22:59.600 | 0.800000 | -0.406333 | 0.121667 | -35.7436 | 28.3414 | NaN | rest | sitting | E | 54 | True |
| 2019-01-20 17:25:40.400 | 0.623500 | -0.247500 | 0.226000 | -5.7562 | 55.8294 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:25:40.600 | 0.669333 | -0.957000 | 0.440667 | -48.6584 | 85.9266 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:04.800 | 1.115000 | -0.792500 | 0.809000 | 91.5732 | 21.1710 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:05.000 | 0.714000 | -0.101000 | 0.646000 | 98.5000 | 127.2804 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:07.400 | -0.217667 | 0.465667 | 0.320333 | -108.5002 | -117.7196 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:07.600 | 0.616000 | -0.205500 | 0.625000 | -207.6098 | -96.8414 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:07.800 | 1.070333 | -1.238333 | 0.751000 | -79.0244 | -103.5126 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:09.400 | 0.773667 | -1.072000 | 0.310000 | -8.6950 | -61.7318 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:09.600 | 1.464000 | -0.902000 | 0.083000 | -128.5364 | -75.0488 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:09.800 | 0.709333 | -0.014667 | -0.120667 | -235.5244 | -187.9512 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:11.600 | 0.419000 | 0.175500 | -0.176500 | 171.8048 | 103.7316 | NaN | rest | standing | E | 44 | True |
| 2019-01-20 17:26:11.800 | 1.155333 | -0.807667 | -0.160333 | -81.4878 | 233.1832 | NaN | rest | standing | E | 44 | True |
removed_outliers_df = df.copy()
for col in outlier_columns:
for label in df["label"].unique():
dataset = mark_outliers_chauvenet(df[df["label"] == label], col)
# replace outliers with nan
dataset.loc[dataset[col + "_outlier"], col] = np.nan
# update column in the original dataset
removed_outliers_df.loc[(removed_outliers_df["label"] == label), col] = dataset[col]
# count how many outliers were removed
outliers_no = len(df) - len(removed_outliers_df[col].dropna())
print(f"Removed {outliers_no} from {col} for {label}")
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
Removed 0 from acc_x for bench Removed 2 from acc_x for ohp Removed 2 from acc_x for squat Removed 4 from acc_x for dead Removed 4 from acc_x for row Removed 4 from acc_x for rest Removed 5 from acc_y for bench Removed 11 from acc_y for ohp Removed 11 from acc_y for squat Removed 11 from acc_y for dead Removed 11 from acc_y for row Removed 11 from acc_y for rest Removed 3 from acc_z for bench Removed 9 from acc_z for ohp Removed 9 from acc_z for squat Removed 10 from acc_z for dead Removed 10 from acc_z for row
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
Removed 10 from acc_z for rest Removed 2 from gyr_x for bench Removed 6 from gyr_x for ohp Removed 7 from gyr_x for squat Removed 13 from gyr_x for dead Removed 13 from gyr_x for row Removed 25 from gyr_x for rest Removed 14 from gyr_y for bench Removed 29 from gyr_y for ohp Removed 38 from gyr_y for squat Removed 52 from gyr_y for dead Removed 62 from gyr_y for row Removed 71 from gyr_y for rest Removed 13 from gyr_z for bench Removed 14 from gyr_z for ohp Removed 26 from gyr_z for squat Removed 40 from gyr_z for dead Removed 40 from gyr_z for row Removed 64 from gyr_z for rest
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
print(f"Removed {outliers_no} from {col} for {label}")
Removed 64 from gyr_z for rest
for col in outlier_columns:
for label in df["label"].unique():
dataset = mark_outliers_chauvenet(df[df["label"] == label], col)
# replace outliers with nan
dataset.loc[dataset[col + "_outlier"], col] = np.nan
# update column in the original dataset
removed_outliers_df.loc[(removed_outliers_df["label"] == label), col] = dataset[col]
# count how many outliers were removed
outliers_no = len(df) - len(removed_outliers_df[col].dropna())
print(f"Removed {outliers_no} from {col} for {label}")
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
Removed 4 from acc_x for bench Removed 4 from acc_x for ohp Removed 4 from acc_x for squat Removed 4 from acc_x for dead Removed 4 from acc_x for row Removed 4 from acc_x for rest Removed 11 from acc_y for bench Removed 11 from acc_y for ohp Removed 11 from acc_y for squat Removed 11 from acc_y for dead Removed 11 from acc_y for row Removed 11 from acc_y for rest Removed 10 from acc_z for bench Removed 10 from acc_z for ohp Removed 10 from acc_z for squat Removed 10 from acc_z for dead
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
Removed 10 from acc_z for row Removed 10 from acc_z for rest Removed 25 from gyr_x for bench Removed 25 from gyr_x for ohp Removed 25 from gyr_x for squat Removed 25 from gyr_x for dead Removed 25 from gyr_x for row Removed 25 from gyr_x for rest Removed 71 from gyr_y for bench Removed 71 from gyr_y for ohp Removed 71 from gyr_y for squat Removed 71 from gyr_y for dead Removed 71 from gyr_y for row Removed 71 from gyr_y for rest Removed 64 from gyr_z for bench Removed 64 from gyr_z for ohp Removed 64 from gyr_z for squat Removed 64 from gyr_z for dead Removed 64 from gyr_z for row Removed 64 from gyr_z for rest
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
removed_outliers_df = df.copy()
for col in outlier_columns:
for label in df["label"].unique():
dataset = mark_outliers_chauvenet(df[df["label"] == label], col)
# replace outliers with nan
dataset.loc[dataset[col + "_outlier"], col] = np.nan
# update column in the original dataset
removed_outliers_df.loc[(removed_outliers_df["label"] == label), col] = dataset[col]
# count how many outliers were removed
outliers_no = len(dataset) - len(removed_outliers_df[col].dropna())
print(f"Removed {outliers_no} from {col} for {label}")
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
Removed -7344 from acc_x for bench Removed -7331 from acc_x for ohp Removed -7397 from acc_x for squat Removed -7474 from acc_x for dead Removed -7588 from acc_x for row Removed -7895 from acc_x for rest Removed -7339 from acc_y for bench Removed -7322 from acc_y for ohp Removed -7388 from acc_y for squat Removed -7467 from acc_y for dead Removed -7581 from acc_y for row Removed -7888 from acc_y for rest Removed -7341 from acc_z for bench Removed -7324 from acc_z for ohp Removed -7390 from acc_z for squat Removed -7468 from acc_z for dead
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
Removed -7582 from acc_z for row Removed -7889 from acc_z for rest Removed -7342 from gyr_x for bench Removed -7327 from gyr_x for ohp Removed -7392 from gyr_x for squat Removed -7465 from gyr_x for dead Removed -7579 from gyr_x for row Removed -7874 from gyr_x for rest Removed -7330 from gyr_y for bench Removed -7304 from gyr_y for ohp Removed -7361 from gyr_y for squat Removed -7426 from gyr_y for dead Removed -7530 from gyr_y for row Removed -7828 from gyr_y for rest Removed -7331 from gyr_z for bench Removed -7319 from gyr_z for ohp Removed -7373 from gyr_z for squat Removed -7438 from gyr_z for dead Removed -7552 from gyr_z for row Removed -7835 from gyr_z for rest
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
for col in outlier_columns:
for label in df["label"].unique():
dataset = mark_outliers_chauvenet(df[df["label"] == label], col)
# replace outliers with nan
dataset.loc[dataset[col + "_outlier"], col] = np.nan
# update column in the original dataset
removed_outliers_df.loc[(removed_outliers_df["label"] == label), col] = dataset[col]
# count how many outliers were removed
outliers_no = len(dataset) - len(dataset[col].dropna())
print(f"Removed {outliers_no} from {col} for {label}")
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
Removed 0 from acc_x for bench Removed 2 from acc_x for ohp Removed 0 from acc_x for squat Removed 2 from acc_x for dead Removed 0 from acc_x for row Removed 0 from acc_x for rest Removed 5 from acc_y for bench Removed 6 from acc_y for ohp Removed 0 from acc_y for squat Removed 0 from acc_y for dead Removed 0 from acc_y for row Removed 0 from acc_y for rest Removed 3 from acc_z for bench Removed 6 from acc_z for ohp Removed 0 from acc_z for squat Removed 1 from acc_z for dead Removed 0 from acc_z for row
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
Removed 0 from acc_z for rest Removed 2 from gyr_x for bench Removed 4 from gyr_x for ohp Removed 1 from gyr_x for squat Removed 6 from gyr_x for dead Removed 0 from gyr_x for row Removed 12 from gyr_x for rest Removed 14 from gyr_y for bench Removed 15 from gyr_y for ohp Removed 9 from gyr_y for squat Removed 14 from gyr_y for dead Removed 10 from gyr_y for row Removed 9 from gyr_y for rest Removed 13 from gyr_z for bench Removed 1 from gyr_z for ohp Removed 12 from gyr_z for squat Removed 14 from gyr_z for dead Removed 0 from gyr_z for row Removed 24 from gyr_z for rest
<ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i])) <ipython-input-3-b374445ec478>:142: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` 1.0 - 0.5 * (scipy.special.erf(high[i]) - scipy.special.erf(low[i]))
removed_outliers_df
| acc_x | acc_y | acc_z | gyr_x | gyr_y | gyr_z | label | category | participant | set | |
|---|---|---|---|---|---|---|---|---|---|---|
| epoch (ms) | ||||||||||
| 2019-01-11 15:08:05.200 | 0.013500 | 0.977000 | -0.071000 | -1.8904 | 2.4392 | 0.9388 | bench | heavy | B | 64 |
| 2019-01-11 15:08:05.400 | -0.001500 | 0.970500 | -0.079500 | -1.6826 | -0.8904 | 2.1708 | bench | heavy | B | 64 |
| 2019-01-11 15:08:05.600 | 0.001333 | 0.971667 | -0.064333 | 2.5608 | -0.2560 | -1.4146 | bench | heavy | B | 64 |
| 2019-01-11 15:08:05.800 | -0.024000 | 0.957000 | -0.073500 | 8.0610 | -4.5244 | -2.0730 | bench | heavy | B | 64 |
| 2019-01-11 15:08:06.000 | -0.028000 | 0.957667 | -0.115000 | 2.4390 | -1.5486 | -3.6098 | bench | heavy | B | 64 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2019-01-20 17:33:27.000 | -0.048000 | -1.041500 | -0.076500 | 1.4146 | -5.6218 | 0.2926 | row | medium | E | 71 |
| 2019-01-20 17:33:27.200 | -0.037000 | -1.030333 | -0.053333 | -2.7684 | -0.5854 | 2.2440 | row | medium | E | 71 |
| 2019-01-20 17:33:27.400 | -0.060000 | -1.031000 | -0.082000 | 2.8416 | -5.1342 | -0.1220 | row | medium | E | 71 |
| 2019-01-20 17:33:27.600 | -0.038667 | -1.025667 | -0.044667 | -0.2318 | 0.2562 | 1.1220 | row | medium | E | 71 |
| 2019-01-20 17:33:27.800 | -0.044000 | -1.034000 | -0.059000 | 1.0980 | -4.0240 | 0.9760 | row | medium | E | 71 |
9009 rows × 10 columns
removed_outliers_df.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 9009 entries, 2019-01-11 15:08:05.200000 to 2019-01-20 17:33:27.800000 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 acc_x 9005 non-null float64 1 acc_y 8998 non-null float64 2 acc_z 8999 non-null float64 3 gyr_x 8984 non-null float64 4 gyr_y 8938 non-null float64 5 gyr_z 8945 non-null float64 6 label 9009 non-null object 7 category 9009 non-null object 8 participant 9009 non-null object 9 set 9009 non-null int64 dtypes: float64(6), int64(1), object(3) memory usage: 774.2+ KB
removed_outliers_df.to_pickle("../../interim/removed_outliers_chauvenet_02.pkl")
--------------------------------------------------------------------------- OSError Traceback (most recent call last) /Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py in line 1 ----> <a href='file:///Users/bogdanduminica/Desktop/tracking-barbell-exercises/src/features/remove_outliers.py?line=285'>286</a> removed_outliers_df.to_pickle("../../interim/removed_outliers_chauvenet_02.pkl") File /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/util/_decorators.py:333, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/util/_decorators.py?line=326'>327</a> if len(args) > num_allow_args: <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/util/_decorators.py?line=327'>328</a> warnings.warn( <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/util/_decorators.py?line=328'>329</a> msg.format(arguments=_format_argument_list(allow_args)), <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/util/_decorators.py?line=329'>330</a> FutureWarning, <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/util/_decorators.py?line=330'>331</a> stacklevel=find_stack_level(), <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/util/_decorators.py?line=331'>332</a> ) --> <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/util/_decorators.py?line=332'>333</a> return func(*args, **kwargs) File /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/generic.py:3165, in NDFrame.to_pickle(self, path, compression, protocol, storage_options) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/generic.py?line=3114'>3115</a> """ <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/generic.py?line=3115'>3116</a> Pickle (serialize) object to file. <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/generic.py?line=3116'>3117</a> (...) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/generic.py?line=3160'>3161</a> 4 4 9 <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/generic.py?line=3161'>3162</a> """ # noqa: E501 <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/generic.py?line=3162'>3163</a> from pandas.io.pickle import to_pickle -> <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/generic.py?line=3164'>3165</a> to_pickle( <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/generic.py?line=3165'>3166</a> self, <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/generic.py?line=3166'>3167</a> path, <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/generic.py?line=3167'>3168</a> compression=compression, <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/generic.py?line=3168'>3169</a> protocol=protocol, <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/generic.py?line=3169'>3170</a> storage_options=storage_options, <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/generic.py?line=3170'>3171</a> ) File /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/pickle.py:103, in to_pickle(obj, filepath_or_buffer, compression, protocol, storage_options) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/pickle.py?line=99'>100</a> if protocol < 0: <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/pickle.py?line=100'>101</a> protocol = pickle.HIGHEST_PROTOCOL --> <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/pickle.py?line=102'>103</a> with get_handle( <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/pickle.py?line=103'>104</a> filepath_or_buffer, <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/pickle.py?line=104'>105</a> "wb", <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/pickle.py?line=105'>106</a> compression=compression, <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/pickle.py?line=106'>107</a> is_text=False, <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/pickle.py?line=107'>108</a> storage_options=storage_options, <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/pickle.py?line=108'>109</a> ) as handles: <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/pickle.py?line=109'>110</a> # letting pickle write directly to the buffer is more memory-efficient <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/pickle.py?line=110'>111</a> pickle.dump(obj, handles.handle, protocol=protocol) File /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/common.py:749, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/common.py?line=746'>747</a> # Only for write methods <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/common.py?line=747'>748</a> if "r" not in mode and is_path: --> <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/common.py?line=748'>749</a> check_parent_directory(str(handle)) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/common.py?line=750'>751</a> if compression: <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/common.py?line=751'>752</a> if compression != "zstd": <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/common.py?line=752'>753</a> # compression libraries do not like an explicit text-mode File /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/common.py:616, in check_parent_directory(path) <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/common.py?line=613'>614</a> parent = Path(path).parent <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/common.py?line=614'>615</a> if not parent.is_dir(): --> <a href='file:///Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/io/common.py?line=615'>616</a> raise OSError(rf"Cannot save file into a non-existent directory: '{parent}'") OSError: Cannot save file into a non-existent directory: '../../interim'
removed_outliers_df.to_pickle("../../data/interim/removed_outliers_chauvenet_02.pkl")